/**
 * @file pybind11.cpp
 *
 * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 */
 #include <pybind11/pybind11.h>
 #include <torch/extension.h>
 #include "add_custom_tiling.h"
 #include "aclrtlaunch_add_custom.h"
 
 #include "torch_npu/csrc/core/npu/NPUStream.h"
 extern AddCustomTiling* GenerateAddCustomTiling(uint32_t totalLength);
 namespace my_add {
 at::Tensor run_add_custom(const at::Tensor &x, const at::Tensor &y)
 {
     auto acl_stream = c10_npu::getCurrentNPUStream().stream(false);
     at::Tensor z = at::empty_like(x);
     uint32_t blockDim = 8;
     uint32_t totalLength = 1;
     for (uint32_t size : x.sizes()) {
         totalLength *= size;
     }
     AddCustomTiling* tiling = GenerateAddCustomTiling(totalLength);
     ACLRT_LAUNCH_KERNEL(add_custom)
     (blockDim, acl_stream, const_cast<void *>(x.storage().data()), const_cast<void *>(y.storage().data()),
      const_cast<void *>(z.storage().data()), tiling);
     return z;
 }
 } // namespace my_add
 
 PYBIND11_MODULE(add_custom, m)
 {
     m.doc() = "add_custom pybind11 interfaces"; // optional module docstring
     m.def("run_add_custom", &my_add::run_add_custom, "");
 }
 